## code to prepare `example_gdc_laml` dataset goes here

# TCGA LAML annotation and MAF are copied from maftools
# Gene expression data is downloaded from https://xenabrowser.net/datapages/?host=https%3A%2F%2Fgdc.xenahubs.net&dataset=TCGA-LAML.htseq_counts.tsv&allSamples=true&removeHub=https%3A%2F%2Fxena.treehouse.gi.ucsc.edu%3A443
# and then subsampling
expr = data.table::fread("~/Downloads/TCGA-LAML.htseq_counts.tsv.gz")
expr[1:5, 1:5]

expr[, Ensembl_ID := substr(Ensembl_ID, 1, 15)]
set.seed(1234L)
idx = sample(1:nrow(expr), 500)
idx

data = expr[idx, ]
data.table::fwrite(data, sep = "\t",
                   file = "inst/cohorts/example_TCGA_LAML/tcga_laml_expr_HTSeq_count.tsv.gz")
